About this file Data Fields:

library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(car)
Loading required package: carData

Attaching package: ‘car’

The following object is masked from ‘package:dplyr’:

    recode
library(MASS)

Attaching package: ‘MASS’

The following object is masked from ‘package:dplyr’:

    select
library(repr)

library(pals)
ecars_raw = read.csv('EV_cars.csv')
ecars_raw
NA
#rename some of the columns
ecars_raw = ecars_raw %>% rename(Price = Price.DE., Acceleration = acceleration..0.100.)

# extract the Make of each car into its own column
make = strsplit(ecars_raw$Car_name, split = ' ')

make_ = c()
n = length(make)

for (i in 1:n) {
  make_[i] = make[[i]][1]
}

ecars_raw$Make = make_
# move columns so continuous variables are together 
ecars_raw = ecars_raw %>% relocate(Make, .before = Car_name_link)
ecars_raw = ecars_raw %>% relocate(Battery, .after = Car_name_link)
ecars_raw
ecars_raw = ecars_raw %>% filter(!is.na(Fast_charge))
ecars = ecars_raw %>% filter(!is.na(Price))
ecars_missing_price = ecars_raw %>% filter(is.na(Price))

This data required minimal processing. I created a Make variable by extracting the first word from the Car_name variable. I also renamed several columns to make them more intuitive for example acceleration..0.100. to Acceleration. I removed the two cars that did not have Fast Charge (the Renault Twingo Electric and the e.Go e.wave X) capability because this was an important feature in the linear regression and was impacting their price. Finally I made sure all the continuous variables were next to each other to simplify calling them. I split the dataframe into two. One with prices(307 objects) and one with missing prices (51 objects).

After cleaning the data 45 unique car makes were included in the ecars data used to create the linear model and 22 unique car makes were included in the data with missing prices. Additionally 14 makes that have 10 or more car models are highlighted throughout the project.

length(ecars$Price)
[1] 307
ecars
library(tibble)

ggplot(ecars, aes(Price, y = factor(0))) +
  geom_boxplot()+
  theme(axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())

NA
NA
NA
Noecars = ecars

Noecars$Make2 = Noecars$Make
Noecars
makes = top_10$Make
makes
  [1] "Tesla"      "Tesla"      "MG"         "Tesla"      "Tesla"      "BMW"        "Volvo"      "Citroen"    "BMW"        "BMW"       
 [11] "Hyundai"    "Tesla"      "Tesla"      "BMW"        "Fiat"       "Tesla"      "BMW"        "BMW"        "Opel"       "MG"        
 [21] "BMW"        "Volvo"      "Volkswagen" "BMW"        "Audi"       "MG"         "Hyundai"    "Volkswagen" "Peugeot"    "Audi"      
 [31] "BMW"        "Hyundai"    "Hyundai"    "Volkswagen" "Volkswagen" "Citroen"    "MG"         "Hyundai"    "Tesla"      "Volvo"     
 [41] "BMW"        "Hyundai"    "Opel"       "MG"         "Mercedes"   "Volvo"      "BMW"        "Audi"       "MG"         "Hyundai"   
 [51] "Tesla"      "Mercedes"   "Hyundai"    "Peugeot"    "Tesla"      "Volvo"      "Hyundai"    "Volkswagen" "Audi"       "BMW"       
 [61] "Audi"       "BMW"        "Fiat"       "MG"         "Volkswagen" "Mercedes"   "Citroen"    "Volvo"      "Peugeot"    "MG"        
 [71] "MG"         "Audi"       "Volvo"      "Mercedes"   "Mercedes"   "NIO"        "Volkswagen" "NIO"        "Volvo"      "Mercedes"  
 [81] "Volvo"      "Mercedes"   "Peugeot"    "Mercedes"   "Opel"       "Volkswagen" "Citroen"    "Volvo"      "Volkswagen" "Mercedes"  
 [91] "Mercedes"   "Mercedes"   "Peugeot"    "Hyundai"    "Audi"       "NIO"        "Audi"       "BMW"        "Audi"       "Porsche"   
[101] "Porsche"    "BMW"        "Opel"       "Volvo"      "Volkswagen" "Fiat"       "Opel"       "Citroen"    "Mercedes"   "NIO"       
[111] "Citroen"    "Mercedes"   "MG"         "Mercedes"   "Mercedes"   "Porsche"    "Peugeot"    "NIO"        "NIO"        "Mercedes"  
[121] "Mercedes"   "Audi"       "Fiat"       "Audi"       "NIO"        "Mercedes"   "Mercedes"   "NIO"        "Mercedes"   "Audi"      
[131] "Porsche"    "Mercedes"   "Mercedes"   "Mercedes"   "Porsche"    "Mercedes"   "Porsche"    "Mercedes"   "Opel"       "Mercedes"  
[141] "Audi"       "Audi"       "Fiat"       "Mercedes"   "Porsche"    "Mercedes"   "Mercedes"   "Citroen"    "Mercedes"   "Opel"      
[151] "NIO"        "Opel"       "Citroen"    "Mercedes"   "Mercedes"   "Porsche"    "Peugeot"    "Peugeot"    "Peugeot"    "Porsche"   
[161] "Porsche"    "Mercedes"   "Mercedes"   "Mercedes"   "Citroen"    "NIO"        "Peugeot"    "Peugeot"    "Porsche"    "Porsche"   
[171] "Peugeot"    "Fiat"       "Citroen"    "Porsche"    "Citroen"    "Fiat"       "Porsche"    "Porsche"    "Citroen"    "Citroen"   
[181] "Opel"       "Citroen"    "Porsche"    "Opel"       "Porsche"    "Peugeot"    "Mercedes"   "Peugeot"    "Fiat"       "Porsche"   
[191] "Mercedes"   "Opel"       "Opel"       "Opel"       "Peugeot"    "Citroen"    "Fiat"       "Opel"       "Opel"       "Fiat"      
Noecars$Make3 = ifelse(Noecars$Make2 %in% makes, Noecars$Make2, "Other")
Noecars$Make2 %in% makes
  [1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE
 [22] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE
 [43] FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
 [64] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE
 [85] FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
[106]  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE
[127] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE FALSE FALSE
[148]  TRUE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
[169]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
[190] FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
[211]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
[232] FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE
[253] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE
[274]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
[295]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
Noecars
ecars_make

is_outlier <- function(x) {
  return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
}

dat <- ecars %>% tibble::rownames_to_column(var="outlier") %>%
  mutate(is_outlier=ifelse(is_outlier(Price), Price, as.numeric(NA)))

dat
dat$outlier[which(is.na(dat$is_outlier))] <- as.numeric(NA)

ggplot(dat, aes(y=Price, x=factor(0))) + geom_boxplot() + geom_text(aes(label=outlier),na.rm=TRUE,nudge_y=0.1)

ggplot(ecars_make, aes(Top_speed, Make, fill = Make)) +
   scale_fill_manual(values = make_colors)+
  geom_boxplot(outlier.colour="black", outlier.shape=16, outlier.size=2, notch=FALSE)
ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)/1000)) %>%
  ggplot(., aes(x = Make, y = average_cost, fill = Make, label = Make)) +
  geom_bar(stat = 'identity') +
  scale_fill_manual(values = make_colors) +
  ylab('Average cost in Germany in 100s of eruros')+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  geom_text(angle = 90, position = position_stack(vjust = 0.5)) + 
  theme(legend.position = "none")
ecars
top_10 = ecars %>% group_by(Make) %>%
  filter(n() >= 10) #%>%
  #summarise(average_cost = mean(Price))
top_10



make_colors = c('#e6194b', '#f58231',  '#ffe119', 
                '#bcf60c','#3cb44b', '#008080',
                '#aaffc3', '#4363d8', '#000075',
                '#46f0f0', '#911eb4', '#e6beff',
                '#f032e6', '#fabebe')


ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), size = 2) + 
  scale_color_manual(values = make_colors)



ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Efficiency, y = Price)) +
  geom_point(aes(col = Make)) 


efficiency_make = ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Efficiency, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) +
  facet_wrap(~Make)+
  scale_color_manual(values = make_colors)

efficiency_make


battery_make = ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) +
  facet_wrap(~Make)
battery_make

#ggsave('battery_make.png', width = 15, height = 9)

#ggsave('efficiency_make.png', width = 15, height = 9)

#keep working on this'
battery_price = ggplot(NULL, aes(x = 'Battery', y = Price)) +
  geom_point(data = ecars, aes(x = Battery, y = Price)) +
  geom_point(data = top_10, aes(x = Battery, y = Price, col = Make))# + scale_color_manual(values = make_colors)#

battery_price
ggsave('battery_price.png', width = 10)
Saving 10 x 4.51 in image

#ecars %>% group_by(Make)%>%
 # summarise(average_cost = mean(Price)) %>%
  #arrange(average_cost)
ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) 

BatvPrice = ggplot(NULL, aes(x = Battery, y = Price)) +
  geom_point(data = ecars, aes(x = Battery, y = Price, fill = 'black')) +
  geom_point(data = top_10, aes(x = Battery, y = Price, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Battery Capacity (kWh)") + 
  ylab("Price in Germany (euros) ") +
  ggtitle('Electric Vehicle Battery vs. Price (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")

BatvPrice

ggsave('BatvPrice.png', width = 10)

ecars
ggarrange(a, b, c, d, 
          labels = c("A", "B", "C", "D"),
          ncol = 2, nrow = 2)
BatvRange = ggplot(NULL, aes(x = Battery, y = Range)) +
  geom_point(data = ecars, aes(x = Battery, y = Range, fill = 'black')) +
  geom_point(data = top_10, aes(x = Battery, y = Range, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Battery Capacity (kWh)") + 
  ylab("Range (km on one charge) ") +
  ggtitle('Electric Vehicle Battery vs. Range (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")


AccvPrice = ggplot(NULL, aes(x = Acceleration, y = Price)) +
  geom_point(data = ecars, aes(x = Acceleration, y = Price, fill = 'black')) +
  geom_point(data = top_10, aes(x = Acceleration, y = Price, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Acceleration (seconds to 100 km/hr)") + 
  ylab("Price in Germany (euros) ") +
  ggtitle('Acceleration vs. Price (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  xlim(2,15)+
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")

BatvRange
BatvPrice
AccvPrice
unique(ecars$Make)
unique(ecars_missing_price$Make)

length(unique(ecars$Make))
length(unique(ecars_missing_price$Make))

ecars_missing_price
ecars
plot(ecars[,4:10], main = 'Comparison of all Quantitive Features')
plot(ecars$Acceleration, ecars$Battery)

ecars$Price_pow = (ecars$Price ** -.5)
ecars$Speed_pow = (ecars$Top_speed ** .25)
ecars$Efficiency_pow = (ecars$Efficiency ** .25)
summary(ecars)
sapply(ecars, sd)
hist(ecars$Price) 
hist(ecars$Price_pow) 
hist(ecars$Efficiency)
hist(ecars$Price_pow)
ecars

Colinearity with predicting Range based n battery

ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%o
  ggplot(., aes(x = Battery, y = Price)) +
  

RBmodel = lm(Range ~  Battery, data = ecars)
summary(RBmodel)
plot(RBmodel) 

PBmodel = lm(Price ~  Battery, data = ecars)
summary(PBmodel)
plot(PBmodel) 

ATmodel = lm(Top_speed ~ Acceleration, data = ecars)
summary(ATmodel)
plot(ATmodel)

model = lm(Price_pow ~ Efficiency_pow + Range + Speed_pow + Fast_charge, data = ecars)
summary(model) 
plot(model)

#battery speed_pow efficiency and fast charge 

With transformation done to price

ecars_missing_price$Speed_pow = (ecars_missing_price$Top_speed ** .25)
ecars_missing_price$Efficiency_pow = (ecars_missing_price$Efficiency ** .25)
test = predict(model, ecars_missing_price, interval = 'prediction')
test_2_dollars = (1/(test))^2
ecars_missing_price
test_2_dollars
ecars_missing_price$predicted_price = (test_2_dollars[,1])
ecars_missing_price
test2 = predict(model, ecars, interval = 'prediction')
test3 = predict(model, ecars, interval = 'confidence')
test2_2_dollars = (1/(test2))^2
test3_2_dollars = (1/(test3))^2
test2_2_dollars
test3_2_dollars
test2_2_dollarsnew = data.frame(Name = ecars$Car_name,
                 Make = ecars$Make,
                 Price = ecars$Price/1000, 
                 Predicted_price = (test2_2_dollars[,1]/1000),
                 Predicted_price_lwr = (test2_2_dollars[,2]/1000),
                 Predicted_price_upr = (test2_2_dollars[,3]/1000),
                 Confidence_price_lwr = (test3_2_dollars[,2]/1000),
                 Confidence_price_upr = (test3_2_dollars[,3])/1000)
test2_2_dollarsnew
# cars groups by brands with the most models

most_makes = test2_2_dollarsnew %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)), average_predicted_cost = (mean(Predicted_price)))



gplt1 = ggplot(NULL, aes(Predicted_price, Price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_upr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_lwr), col = 'blue') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_upr), col = 'blue') +
      ylim(0,250)+ 
geom_point(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price), alpha = .5) +
geom_point(data = most_makes, aes(x = average_predicted_cost, y = average_cost), size = 3, shape = 23, fill = make_colors) +
 scale_color_manual(values = make_colors)

gplt1

ggsave('gplt1.png', width = 15)
         
gplt1 = ggplot(NULL, aes(Predicted_price, Price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_upr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_lwr), col = 'blue') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_upr), col = 'blue') +
      ylim(0,250)+ 
      geom_point(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price), alpha = .5) +
      geom_point(data = most_makes, aes(x = average_predicted_cost, y = average_cost), size = 3, shape = 23, fill = make_colors) +
      scale_color_manual(values = make_colors)
test2_2_dollarsnew %>%
  arrange(Predicted_price)
plot_ly(
  data = ecars_make,
  x = ~Price,
  type = "box",
  text = ~Car_name,
  name = '',
  tooltip = c("x", "text"))%>% 
  layout(title = "Box Plot of Selected Feature",
         yaxis = list(title = ''),
         xaxis = list(title = ''))
Warning: 'box' objects don't have these attributes: 'tooltip'
Valid attributes include:
'alignmentgroup', 'boxmean', 'boxpoints', 'customdata', 'customdatasrc', 'dx', 'dy', 'fillcolor', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'jitter', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'lowerfence', 'lowerfencesrc', 'marker', 'mean', 'meansrc', 'median', 'mediansrc', 'meta', 'metasrc', 'name', 'notched', 'notchspan', 'notchspansrc', 'notchwidth', 'offsetgroup', 'opacity', 'orientation', 'pointpos', 'q1', 'q1src', 'q3', 'q3src', 'quartilemethod', 'sd', 'sdsrc', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'upperfence', 'upperfencesrc', 'visible', 'whiskerwidth', 'width', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
Warning: 'box' objects don't have these attributes: 'tooltip'
Valid attributes include:
'alignmentgroup', 'boxmean', 'boxpoints', 'customdata', 'customdatasrc', 'dx', 'dy', 'fillcolor', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hoveron', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'jitter', 'legendgroup', 'legendgrouptitle', 'legendrank', 'line', 'lowerfence', 'lowerfencesrc', 'marker', 'mean', 'meansrc', 'median', 'mediansrc', 'meta', 'metasrc', 'name', 'notched', 'notchspan', 'notchspansrc', 'notchwidth', 'offsetgroup', 'opacity', 'orientation', 'pointpos', 'q1', 'q1src', 'q3', 'q3src', 'quartilemethod', 'sd', 'sdsrc', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textsrc', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'upperfence', 'upperfencesrc', 'visible', 'whiskerwidth', 'width', 'x', 'x0', 'xaxis', 'xcalendar', 'xhoverformat', 'xperiod', 'xperiod0', 'xperiodalignment', 'xsrc', 'y', 'y0', 'yaxis', 'ycalendar', 'yhoverformat', 'yperiod', 'yperiod0', 'yperiodalignment', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'
ecars[,5]
  [1] 172 137 183 171 149 164 197 173 176 170 157 158 170 164 181 168 170 192 159 180 165 178 166 152 171 179 167 185 199 188 175 185
 [33] 180 162 208 210 169 165 169 168 166 165 226 162 210 171 240 161 156 168 206 149 164 193 166 223 177 186 160 172 175 190 164 185
 [65] 176 220 168 178 166 148 178 193 197 200 171 201 195 177 159 191 202 185 171 176 230 160 199 168 213 200 200 170 192 169 209 203
 [97] 183 169 202 170 183 183 217 167 175 192 174 176 156 169 224 231 158 171 162 190 168 191 165 169 175 218 200 185 203 209 188 177
[129] 177 230 164 290 176 204 182 237 180 175 198 197 211 158 191 188 223 169 201 170 168 240 188 169 217 176 153 193 181 154 184 154
[161] 159 238 183 187 164 164 184 205 155 190 167 195 178 177 173 196 233 186 173 208 169 186 156 179 158 157 228 226 290 215 205 154
[193] 169 178 174 202 176 195 232 209 214 178 188 186 176 193 200 174 173 200 159 195 178 211 193 203 183 208 186 177 188 168 169 221
[225] 200 184 174 179 190 208 221 215 257 180 201 221 186 194 220 162 218 257 226 171 206 195 187 250 168 221 262 171 262 201 226 257
[257] 195 295 188 257 226 262 195 190 206 187 286 290 286 168 203 257 207 257 250 197 197 262 262 257 204 226 158 188 212 262 250 250
[289] 262 195 232 192 250 293 250 257 207 286 262 250 257 250 257 250 257 257 257
options(repr.plot.width = 15, repr.plot.height =2) 

ggplot(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price)) +
  geom_point() +
  geom_line(aes(x = Predicted_price, y = Predicted_price)) +
  geom_line(aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
  geom_line(aes(x = Predicted_price, y = Predicted_price_upr)) +
  geom_line(aes(x = Predicted_price, y = Confidence_price_lwr)) +
  geom_line(aes(x = Predicted_price, y = Confidence_price_upr)) #+
 geom_point(aes(x = most_makes$average_cost, y =most_makes$average_predicted_cost))


  
new
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)/1000), average_predicted_cost = (mean(Predicted_price)/1000)) #%>%
  #ggplot(., aes(x = Make, y = average_predicted_cost)) +
  #geom_col(aes(fill = Make)) + 
  #ylab('Average cost in Germany in thousands of Eruros')+
  #theme(axis.title.x=element_blank(),
   #     axis.text.x=element_blank(),
    #    axis.ticks.x=element_blank())
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price =  (mean(Predicted_price)/1000))%>%
  ggplot(., aes(x = average_predicted_price, y = average_price)) +
  geom_point(aes(color = Make), size =4) 
  
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price = (mean(Predicted_price)/1000))%>%
  ggplot(., aes(x = average_predicted_price, y = average_price)) +
  geom_point(aes(color = Make), size =4) +
  geom_line(aes(x = average_predicted_price, y = average_predicted_price)) + 
predict(model, newdata, interval = 'confidence')
predict(model, newdata, interval = 'prediction')
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price = (mean(Predicted_price)/1000))
ecars_missing_price
pre
model
ecars
model.empty = lm(Price_pow ~ 1, data = ecars)
model.full = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge + Acceleration, data = ecars)
scope = list(lower = formula(model.empty), upper = formula(model.full))
scope
forwardAIC = step(model.empty, scope, direction = 'forward', k = 2)

battery speed_pow efficiency and fast charge

model.empty2 = lm(Price ~ 1, data = ecars)
model.full2 = lm(Price ~ Efficiency + Range + Battery + Top_speed + Fast_charge + Acceleration, data = ecars)
scope2 = list(lower = formula(model.empty2), upper = formula(model.full2))
scope2
forwardAIC2 = step(model.empty2, scope2, direction = 'forward', k = 2)
model.empty3 = lm(Price_pow ~ 1, data = ecars)
model.full3 = lm(Price_pow ~ Efficiency + Range + Battery + Top_speed + Fast_charge + Acceleration, data = ecars)
scope3 = list(lower = formula(model.empty3), upper = formula(model.full3))
forwardAIC3 = step(model.empty3, scope3, direction = 'forward', k = 2)

I’m getting a lower AIC when I run these without transforming them??? I need to transform price but not the others?? not sure what to do??

model.empty = lm(Price_pow ~ 1, data = ecars)
model.full = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge + Acceleration, data = ecars)
scope = list(lower = formula(model.empty), upper = formula(model.full))
scope
forwardAIC = step(model.empty, scope, direction = 'forward', k = 2)
broom::glance(model)
broom::glance(model.full)
model_best = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge, data = ecars)
broom::glance(model.full3)
broom::glance(model_best)
ecars
model_box = lm(Price ~ Top_speed + Range + Efficiency + Fast_charge + Acceleration, data = ecars)
summary(model_box)
bc = boxCox(model_box)

lambda = bc$x[which(bc$y == max(bc$y))]
lambda
Price.bc = (ecars$Price^lambda - 1)/lambda
Price.bc
model.bc = lm(Price.bc ~ Top_speed + Range + Efficiency + Fast_charge + Acceleration, data = ecars)
              
summary(model.bc)
broom::glance(model.bc)

hist(Price.bc)

****This is how to undo the lambda transformation

((Price.bc*lambda) + 1)^(1/lambda)
ecars$Price
plot.new()
plot( x = 50, y = 54)
lines(predicted_price$Predicted, predicted_price$Predicted)
ecars_missing_price

This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.

---
title: "R Notebook"
output: html_notebook
---
About this file
Data Fields:

- Battery: The capacity of the vehicle's battery in kilowatt-hours (kWh).

- Car_name: The model name of the electric vehicle.

- Car_name_link: A direct link to the corresponding page on EV Database for more in-depth information.

- Efficiency: The energy efficiency rating of the vehicle in watt-hours per kilometer (Wh/km).

- Fast_charge: The fast-charging capability of the vehicle in minutes for a certain charging percentage.

- Price.DE.:The price of the electric vehicle in Germany.

- Range: The driving range of the vehicle on a single charge in kilometers.

- Top_speed:The maximum speed the vehicle can achieve in kilometers per hour.

- Acceleration..0.100.: The acceleration time from 0 to 100 kilometers per hour.
```{r}
library(dplyr)
library(ggplot2)
library(tidyr)
library(car)
library(MASS)
library(repr)

library(pals)

```


```{r}
ecars_raw = read.csv('EV_cars.csv')
ecars_raw

```

```{r}
#rename some of the columns
ecars_raw = ecars_raw %>% rename(Price = Price.DE., Acceleration = acceleration..0.100.)
```

```{r}

# extract the Make of each car into its own column
make = strsplit(ecars_raw$Car_name, split = ' ')

make_ = c()
n = length(make)

for (i in 1:n) {
  make_[i] = make[[i]][1]
}

ecars_raw$Make = make_
```

```{r}
# move columns so continuous variables are together 
ecars_raw = ecars_raw %>% relocate(Make, .before = Car_name_link)
ecars_raw = ecars_raw %>% relocate(Battery, .after = Car_name_link)
ecars_raw
```
```{r}
ecars_raw = ecars_raw %>% filter(!is.na(Fast_charge))
ecars = ecars_raw %>% filter(!is.na(Price))
ecars_missing_price = ecars_raw %>% filter(is.na(Price))
```

This data required minimal processing. I created a Make variable by extracting the first word from the Car_name variable. I also renamed several columns to make them more intuitive for example acceleration..0.100. to Acceleration. I removed the two cars that did not have Fast Charge (the Renault Twingo Electric and the e.Go e.wave X) capability because this was an important feature in the linear regression and was impacting their price. Finally I made sure all the continuous variables were next to each other to simplify calling them. I split the dataframe into two. One with prices(307 objects) and one with missing prices (51 objects). 

After cleaning the data 45 unique car makes were included in the ecars data used to create the linear model and 22 unique car makes were included in the data with missing prices. Additionally 14 makes that have 10 or more car models are highlighted throughout the project.  

```{r}
length(ecars$Price)
```
```{r}
ecars
library(tibble)

ggplot(ecars, aes(Price, y = factor(0))) +
  geom_boxplot()+
  theme(axis.title.y=element_blank(),axis.text.y=element_blank(),axis.ticks.y=element_blank())



```
```{r}
Noecars = ecars

Noecars$Make2 = Noecars$Make
Noecars
makes = top_10$Make
makes
Noecars$Make3 = ifelse(Noecars$Make2 %in% makes, Noecars$Make2, "Other")
Noecars$Make2 %in% makes
Noecars
```

```{r}
ecars_make

is_outlier <- function(x) {
  return(x < quantile(x, 0.25) - 1.5 * IQR(x) | x > quantile(x, 0.75) + 1.5 * IQR(x))
}

dat <- ecars %>% tibble::rownames_to_column(var="outlier") %>%
  mutate(is_outlier=ifelse(is_outlier(Price), Price, as.numeric(NA)))

dat
dat$outlier[which(is.na(dat$is_outlier))] <- as.numeric(NA)

ggplot(dat, aes(y=Price, x=factor(0))) + geom_boxplot() + geom_text(aes(label=outlier),na.rm=TRUE,nudge_y=0.1)

ggplot(ecars_make, aes(Top_speed, Make, fill = Make)) +
   scale_fill_manual(values = make_colors)+
  geom_boxplot(outlier.colour="black", outlier.shape=16, outlier.size=2, notch=FALSE)
```

```{r}
ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)/1000)) %>%
  ggplot(., aes(x = Make, y = average_cost, fill = Make, label = Make)) +
  geom_bar(stat = 'identity') +
  scale_fill_manual(values = make_colors) +
  ylab('Average cost in Germany in 100s of eruros')+
  theme(axis.title.x=element_blank(),
        axis.text.x=element_blank(),
        axis.ticks.x=element_blank()) +
  geom_text(angle = 90, position = position_stack(vjust = 0.5)) + 
  theme(legend.position = "none")
```
```{r}
ecars
```

```{r}
top_10 = ecars %>% group_by(Make) %>%
  filter(n() >= 10) #%>%
  #summarise(average_cost = mean(Price))
top_10



make_colors = c('#e6194b', '#f58231',  '#ffe119', 
                '#bcf60c','#3cb44b', '#008080',
                '#aaffc3', '#4363d8', '#000075',
                '#46f0f0', '#911eb4', '#e6beff',
                '#f032e6', '#fabebe')


ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), size = 2) + 
  scale_color_manual(values = make_colors)


ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Efficiency, y = Price)) +
  geom_point(aes(col = Make)) 

efficiency_make = ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Efficiency, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) +
  facet_wrap(~Make)+
  scale_color_manual(values = make_colors)

efficiency_make

battery_make = ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) +
  facet_wrap(~Make)
battery_make
#ggsave('battery_make.png', width = 15, height = 9)

#ggsave('efficiency_make.png', width = 15, height = 9)

#keep working on this'
battery_price = ggplot(NULL, aes(x = 'Battery', y = Price)) +
  geom_point(data = ecars, aes(x = Battery, y = Price)) +
  geom_point(data = top_10, aes(x = Battery, y = Price, col = Make))# + scale_color_manual(values = make_colors)#

battery_price
ggsave('battery_price.png', width = 10)

#ecars %>% group_by(Make)%>%
 # summarise(average_cost = mean(Price)) %>%
  #arrange(average_cost)
```
```{r}
ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  ggplot(., aes(x = Battery, y = Price)) +
  geom_point(aes(col = Make), show.legend = FALSE) 

BatvPrice = ggplot(NULL, aes(x = Battery, y = Price)) +
  geom_point(data = ecars, aes(x = Battery, y = Price, fill = 'black')) +
  geom_point(data = top_10, aes(x = Battery, y = Price, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Battery Capacity (kWh)") + 
  ylab("Price in Germany (euros) ") +
  ggtitle('Electric Vehicle Battery vs. Price (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")

BatvPrice

ggsave('BatvPrice.png', width = 10)

ecars
```
```{r}
ggarrange(a, b, c, d, 
          labels = c("A", "B", "C", "D"),
          ncol = 2, nrow = 2)
```


```{r}
BatvRange = ggplot(NULL, aes(x = Battery, y = Range)) +
  geom_point(data = ecars, aes(x = Battery, y = Range, fill = 'black')) +
  geom_point(data = top_10, aes(x = Battery, y = Range, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Battery Capacity (kWh)") + 
  ylab("Range (km on one charge) ") +
  ggtitle('Electric Vehicle Battery vs. Range (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")


AccvPrice = ggplot(NULL, aes(x = Acceleration, y = Price)) +
  geom_point(data = ecars, aes(x = Acceleration, y = Price, fill = 'black')) +
  geom_point(data = top_10, aes(x = Acceleration, y = Price, col = Make)) + 
  scale_color_manual(values = make_colors)+
  xlab("Acceleration (seconds to 100 km/hr)") + 
  ylab("Price in Germany (euros) ") +
  ggtitle('Acceleration vs. Price (makes with 10+ Models Highlighted)') +
  labs(fill="") +
  xlim(2,15)+
  scale_fill_discrete(labels=c('Other')) +
  theme(legend.position = "bottom")

BatvRange
BatvPrice
AccvPrice
```




```{r}
unique(ecars$Make)
unique(ecars_missing_price$Make)

length(unique(ecars$Make))
length(unique(ecars_missing_price$Make))

ecars_missing_price
```

```{r}
ecars
plot(ecars[,4:10], main = 'Comparison of all Quantitive Features')
plot(ecars$Acceleration, ecars$Battery)

ecars$Price_pow = (ecars$Price ** -.5)
ecars$Speed_pow = (ecars$Top_speed ** .25)
ecars$Efficiency_pow = (ecars$Efficiency ** .25)
summary(ecars)
sapply(ecars, sd)
hist(ecars$Price) 
hist(ecars$Price_pow) 
hist(ecars$Efficiency)
hist(ecars$Price_pow)
ecars
```
Colinearity with predicting Range based n battery

```{r}
ecars %>% group_by(Make) %>%
  filter(n() >= 10) %>%o
  ggplot(., aes(x = Battery, y = Price)) +
  
```

```{r}

RBmodel = lm(Range ~  Battery, data = ecars)
summary(RBmodel)
plot(RBmodel) 

PBmodel = lm(Price ~  Battery, data = ecars)
summary(PBmodel)
plot(PBmodel) 

ATmodel = lm(Top_speed ~ Acceleration, data = ecars)
summary(ATmodel)
plot(ATmodel)

model = lm(Price_pow ~ Efficiency_pow + Range + Speed_pow + Fast_charge, data = ecars)
summary(model) 
plot(model)

#battery speed_pow efficiency and fast charge 

```
With transformation done to price 
```{r}
ecars_missing_price$Speed_pow = (ecars_missing_price$Top_speed ** .25)
ecars_missing_price$Efficiency_pow = (ecars_missing_price$Efficiency ** .25)
test = predict(model, ecars_missing_price, interval = 'prediction')
test_2_dollars = (1/(test))^2
ecars_missing_price
test_2_dollars
ecars_missing_price$predicted_price = (test_2_dollars[,1])
ecars_missing_price
```
```{r}
test2 = predict(model, ecars, interval = 'prediction')
test3 = predict(model, ecars, interval = 'confidence')
test2_2_dollars = (1/(test2))^2
test3_2_dollars = (1/(test3))^2
test2_2_dollars
test3_2_dollars
test2_2_dollarsnew = data.frame(Name = ecars$Car_name,
                 Make = ecars$Make,
                 Price = ecars$Price/1000, 
                 Predicted_price = (test2_2_dollars[,1]/1000),
                 Predicted_price_lwr = (test2_2_dollars[,2]/1000),
                 Predicted_price_upr = (test2_2_dollars[,3]/1000),
                 Confidence_price_lwr = (test3_2_dollars[,2]/1000),
                 Confidence_price_upr = (test3_2_dollars[,3])/1000)
test2_2_dollarsnew
```
```{r}
# cars groups by brands with the most models

most_makes = test2_2_dollarsnew %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)), average_predicted_cost = (mean(Predicted_price)))
```

```{r}
test = ggplot(ecars, aes(x = Price, y = factor(0))) +
      geom_boxplot()+
      theme(axis.title.y=element_blank(),
            axis.text.y=element_blank(),
            axis.ticks.y=element_blank()) +
      ggtitle('Box Plot of Selected Feature') 
test
```


```{r}


gplt1 = ggplot(NULL, aes(Predicted_price, Price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_upr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_lwr), col = 'blue') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_upr), col = 'blue') +
      ylim(0,250)+ 
geom_point(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price), alpha = .5) +
geom_point(data = most_makes, aes(x = average_predicted_cost, y = average_cost), size = 3, shape = 23, fill = make_colors) +
 scale_color_manual(values = make_colors)

gplt1

ggsave('gplt1.png', width = 15)
         
```
```{r}
gplt1 = ggplot(NULL, aes(Predicted_price, Price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price)) +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Predicted_price_upr), col = 'red') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_lwr), col = 'blue') +
      geom_line(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Confidence_price_upr), col = 'blue') +
      ylim(0,250)+ 
      geom_point(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price), alpha = .5) +
      geom_point(data = most_makes, aes(x = average_predicted_cost, y = average_cost), size = 3, shape = 23, fill = make_colors) +
      scale_color_manual(values = make_colors)

```

```{r}
test2_2_dollarsnew %>%
  arrange(Predicted_price)
```
```{r}
ecars


plot_ly(
  data = ecars_make,
  x = ~Price,
  y = ~Make,
  type = "box",
  color = ~Make,
  colors = make_colors,
  text = ~Car_name,
  tooltip = c("x", "text"),
  showlegend = FALSE
)


plot_ly(
  data = ecars_make,
  x = ~Price,
  type = "box",
  text = ~Car_name,
  name = '',
  tooltip = c("x", "text"))%>% 
  layout(title = "Box Plot of Selected Feature",
         yaxis = list(title = ''),
         xaxis = list(title = ''))
```
```{r}
ecars[,5]
```

```{r}
options(repr.plot.width = 15, repr.plot.height =2) 

ggplot(data = test2_2_dollarsnew, aes(x = Predicted_price, y = Price)) +
  geom_point() +
  geom_line(aes(x = Predicted_price, y = Predicted_price)) +
  geom_line(aes(x = Predicted_price, y = Predicted_price_lwr), col = 'red') +
  geom_line(aes(x = Predicted_price, y = Predicted_price_upr)) +
  geom_line(aes(x = Predicted_price, y = Confidence_price_lwr)) +
  geom_line(aes(x = Predicted_price, y = Confidence_price_upr)) #+
 geom_point(aes(x = most_makes$average_cost, y =most_makes$average_predicted_cost))


  
new
```


```{r}
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_cost = (mean(Price)/1000), average_predicted_cost = (mean(Predicted_price)/1000)) #%>%
  #ggplot(., aes(x = Make, y = average_predicted_cost)) +
  #geom_col(aes(fill = Make)) + 
  #ylab('Average cost in Germany in thousands of Eruros')+
  #theme(axis.title.x=element_blank(),
   #     axis.text.x=element_blank(),
    #    axis.ticks.x=element_blank())
```
```{r}
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price =  (mean(Predicted_price)/1000))%>%
  ggplot(., aes(x = average_predicted_price, y = average_price)) +
  geom_point(aes(color = Make), size =4) 
  
```



```{r}
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price = (mean(Predicted_price)/1000))%>%
  ggplot(., aes(x = average_predicted_price, y = average_price)) +
  geom_point(aes(color = Make), size =4) +
  geom_line(aes(x = average_predicted_price, y = average_predicted_price)) + 


```

```{r}
predict(model, newdata, interval = 'confidence')
predict(model, newdata, interval = 'prediction')
```


```{r}
new
new %>% group_by(Make) %>%
  filter(n() >= 10) %>%
  summarise(average_price = (mean(Price)/1000), average_predicted_price = (mean(Predicted_price)/1000))
```

```{r}
ecars_missing_price
pre
```


```{r}
model
```
```{r}
ecars
```


```{r}
model.empty = lm(Price_pow ~ 1, data = ecars)
model.full = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge + Acceleration, data = ecars)
scope = list(lower = formula(model.empty), upper = formula(model.full))
scope
forwardAIC = step(model.empty, scope, direction = 'forward', k = 2)
```

battery speed_pow efficiency and fast charge 

```{r}
model.empty2 = lm(Price ~ 1, data = ecars)
model.full2 = lm(Price ~ Efficiency + Range + Battery + Top_speed + Fast_charge + Acceleration, data = ecars)
scope2 = list(lower = formula(model.empty2), upper = formula(model.full2))
scope2
forwardAIC2 = step(model.empty2, scope2, direction = 'forward', k = 2)
```


```{r}
model.empty3 = lm(Price_pow ~ 1, data = ecars)
model.full3 = lm(Price_pow ~ Efficiency + Range + Battery + Top_speed + Fast_charge + Acceleration, data = ecars)
scope3 = list(lower = formula(model.empty3), upper = formula(model.full3))
forwardAIC3 = step(model.empty3, scope3, direction = 'forward', k = 2)
```
I'm getting a lower AIC when I run these without transforming them??? I need to transform price but not the others?? not sure what to do?? 

```{r}
model.empty = lm(Price_pow ~ 1, data = ecars)
model.full = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge + Acceleration, data = ecars)
scope = list(lower = formula(model.empty), upper = formula(model.full))
scope
forwardAIC = step(model.empty, scope, direction = 'forward', k = 2)
```

```{r}
broom::glance(model)
broom::glance(model.full)
model_best = lm(Price_pow ~ Efficiency_pow + Range + Battery + Speed_pow + Fast_charge, data = ecars)
broom::glance(model.full3)
broom::glance(model_best)
```


```{r}
ecars
model_box = lm(Price ~ Top_speed + Range + Efficiency + Fast_charge + Acceleration, data = ecars)
summary(model_box)

```
```{r}
bc = boxCox(model_box)

lambda = bc$x[which(bc$y == max(bc$y))]
lambda
Price.bc = (ecars$Price^lambda - 1)/lambda
Price.bc
model.bc = lm(Price.bc ~ Top_speed + Range + Efficiency + Fast_charge + Acceleration, data = ecars)
              
summary(model.bc)
broom::glance(model.bc)

hist(Price.bc)
```
****This is how to undo the lambda transformation
```{r}
((Price.bc*lambda) + 1)^(1/lambda)
ecars$Price
```


```{r}
plot.new()
plot( x = 50, y = 54)
lines(predicted_price$Predicted, predicted_price$Predicted)
```

```{r}
ecars_missing_price
```






This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 

Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Ctrl+Shift+Enter*. 

Add a new chunk by clicking the *Insert Chunk* button on the toolbar or by pressing *Ctrl+Alt+I*.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the *Preview* button or press *Ctrl+Shift+K* to preview the HTML file).

The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike *Knit*, *Preview* does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
